install.packages("Seurat")
library(Seurat)

L1  <-  Read10X(data.dir = './raw_data/L236-A/')
L2  <-  Read10X(data.dir = './raw_data/L236-B/')
L3  <-  Read10X(data.dir = './raw_data/L236-C/')
L4  <-  Read10X(data.dir = './raw_data/L236-D/')


umis <- L1$`Gene Expression`
htos <- L1$`Antibody Capture`
joint.bcs <- intersect(colnames(umis), colnames(htos))

# Subset RNA and HTO counts by joint cell barcodes
umis <- umis[, joint.bcs]
htos <- as.matrix(htos[, joint.bcs])
htos <- htos[c("HTO_AHH01_TotalSeqA","HTO_AHH02_TotalSeqA"),]
# Confirm that the HTO have the correct names
rownames(htos)

L1_exp <- CreateSeuratObject(counts = L1$`Gene Expression`,project = 'L236-A',min.cells = 0,min.features = 0)
L1_exp <- NormalizeData(L1_exp)
# Find and scale variable features
L1_exp <- FindVariableFeatures(L1_exp, selection.method = "mean.var.plot")
L1_exp <- ScaleData(L1_exp, features = VariableFeatures(L1_exp))
L1_exp[["HTO"]] <- CreateAssayObject(counts = htos)
L1_exp <- NormalizeData(L1_exp, assay = "HTO", normalization.method = "CLR")

L1_exp <- HTODemux(L1_exp, assay = "HTO", positive.quantile = 0.8)
#Idents(L1_exp) <- "HTO_maxID"
RidgePlot(L1_exp, assay = "HTO", features = rownames(L1_exp[["HTO"]]), ncol = 2,)

### L2
umis <- L2$`Gene Expression`
htos <- L2$`Antibody Capture`
joint.bcs <- intersect(colnames(umis), colnames(htos))

# Subset RNA and HTO counts by joint cell barcodes
umis <- umis[, joint.bcs]
htos <- as.matrix(htos[, joint.bcs])
htos <- htos[c("HTO_AHH01_TotalSeqA","HTO_AHH02_TotalSeqA"),]
# Confirm that the HTO have the correct names
rownames(htos)

L2_exp <- CreateSeuratObject(counts = L2$`Gene Expression`,project = 'L236-B',min.cells = 0,min.features = 0)
L2_exp <- NormalizeData(L2_exp)
# Find and scale variable features
L2_exp <- FindVariableFeatures(L2_exp, selection.method = "mean.var.plot")
L2_exp <- ScaleData(L2_exp, features = VariableFeatures(L2_exp))
L2_exp[["HTO"]] <- CreateAssayObject(counts = htos)
L2_exp <- NormalizeData(L2_exp, assay = "HTO", normalization.method = "CLR")

L2_exp <- HTODemux(L2_exp, assay = "HTO", positive.quantile = 0.8)
#Idents(L2_exp) <- "HTO_maxID"
RidgePlot(L2_exp, assay = "HTO", features = rownames(L2_exp[["HTO"]]), ncol = 2)


### L3
umis <- L3$`Gene Expression`
htos <- L3$`Antibody Capture`
joint.bcs <- intersect(colnames(umis), colnames(htos))

# Subset RNA and HTO counts by joint cell barcodes
umis <- umis[, joint.bcs]
htos <- as.matrix(htos[, joint.bcs])
#htos <- htos[c("HTO_AHH01_TotalSeqA","HTO_AHH02_TotalSeqA"),]
# Confirm that the HTO have the correct names
rownames(htos)

L3_exp <- CreateSeuratObject(counts = L3$`Gene Expression`,project = 'L236-C',min.cells = 0,min.features = 0)
L3_exp <- NormalizeData(L3_exp)
# Find and scale variable features
L3_exp <- FindVariableFeatures(L3_exp, selection.method = "mean.var.plot")
L3_exp <- ScaleData(L3_exp, features = VariableFeatures(L3_exp))
L3_exp[["HTO"]] <- CreateAssayObject(counts = htos)
L3_exp <- NormalizeData(L3_exp, assay = "HTO", normalization.method = "CLR")

L3_exp <- HTODemux(L3_exp, assay = "HTO", positive.quantile = 0.8)
#Idents(L3_exp) <- "HTO_maxID"
RidgePlot(L3_exp, assay = "HTO", features = rownames(L3_exp[["HTO"]]), ncol = 2)


### L4
umis <- L4$`Gene Expression`
htos <- L4$`Antibody Capture`
joint.bcs <- intersect(colnames(umis), colnames(htos))

# Subset RNA and HTO counts by joint cell barcodes
umis <- umis[, joint.bcs]
htos <- as.matrix(htos[, joint.bcs])
htos <- htos[c("HTO_AHH03_TotalSeqA","HTO_AHH04_TotalSeqA"),]
# Confirm that the HTO have the correct names
rownames(htos)

L4_exp <- CreateSeuratObject(counts = L4$`Gene Expression`,project = 'L236-D',min.cells = 0,min.features = 0)
L4_exp <- NormalizeData(L4_exp)
# Find and scale variable features
L4_exp <- FindVariableFeatures(L4_exp, selection.method = "mean.var.plot")
L4_exp <- ScaleData(L4_exp, features = VariableFeatures(L4_exp))
L4_exp[["HTO"]] <- CreateAssayObject(counts = htos)
L4_exp <- NormalizeData(L4_exp, assay = "HTO", normalization.method = "CLR")

L4_exp <- HTODemux(L4_exp, assay = "HTO", positive.quantile = 0.8)
#Idents(L4_exp) <- "HTO_maxID"
RidgePlot(L4_exp, assay = "HTO", features = rownames(L4_exp[["HTO"]]), ncol = 2)

#### merge data
mrege_data<-c(L2_exp,L3_exp,L4_exp)
cell.ids<-c('L236-A','L236-B','L236-C','L236-D')
L236<-merge(L1_exp,y=mrege_data,add.cell.ids = cell.ids, project = "L236")

# First, we will remove negative cells from the object
L236 <- subset(L236, idents = "Negative", invert = TRUE)
L236 <- subset(L236, idents = "Doublet", invert = TRUE)

saveRDS(L236,'L236.demuxlet.new.rds')
L236 <- readRDS('L236.demuxlet.new.rds')
# Add other info
L236@meta.data$Age <- NA
L236@meta.data$Diet <- NA
L236@meta.data$Tissue <- NA
L236@meta.data[(L236@meta.data$orig.ident=='L236-A'),]$Age <- 'Young'
L236@meta.data[(L236@meta.data$orig.ident=='L236-B')|(L236@meta.data$orig.ident=='L236-D'),]$Age <- 'Adult'
L236@meta.data[(L236@meta.data$orig.ident=='L236-C'),]$Age <- 'Aged'

L236@meta.data[(L236@meta.data$hash.ID=='HTO-AHH01-TotalSeqA')|(L236@meta.data$hash.ID=='HTO-AHH03-TotalSeqA'),]$Tissue <- 'Calvarial Periosteum'
L236@meta.data[(L236@meta.data$hash.ID=='HTO-AHH02-TotalSeqA')|(L236@meta.data$hash.ID=='HTO-AHH04-TotalSeqA'),]$Tissue <- 'Sagittal Suture Mesenchyme'

L236@meta.data[(L236@meta.data$hash.ID=='HTO-AHH03-TotalSeqA')|(L236@meta.data$hash.ID=='HTO-AHH04-TotalSeqA'),]$Diet <- 'Ad libitum'
L236@meta.data[(L236@meta.data$hash.ID=='HTO-AHH02-TotalSeqA'),]$Diet <- 'Intermittent Fasting'
L236@meta.data[(L236@meta.data$hash.ID=='HTO-AHH01-TotalSeqA'),]$Diet <- 'Intermittent Fasting'
L236@meta.data[(L236@meta.data$orig.ident=='L236-A'),]$Diet <- 'Ad libitum'

# Calculate a tSNE embedding of the HTO data
DefaultAssay(L236) <- "RNA"
L236[["percent.mt"]] <- PercentageFeatureSet(L236,pattern = "mt-")
fivenum(L236$percent.mt)
p1 <- VlnPlot(object = L236, features = c("nFeature_RNA", "nCount_RNA", "percent.mt",'percent.rb'), ncol = 3, pt.size = 0,group.by = 'orig.ident') 
p1
p1 <- VlnPlot(object = L236, features = c("nFeature_RNA", "nCount_RNA", "percent.mt",'percent.rb'), ncol = 3, pt.size = 0,group.by = 'hash.ID')
p1
L236 <- subset(L236, subset = nFeature_RNA > 500 & nFeature_RNA < 5000 & percent.mt <5)
L236 <- NormalizeData(L236,normalization.method = "LogNormalize", scale.factor = 10000)
L236 <- FindVariableFeatures(L236,selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(L236)
L236 <- ScaleData(L236,features = all.genes)
saveRDS(L236,'L236.scale.rds')

L236 <- RunPCA(L236, features =VariableFeatures(object = L236))
#check PCA result
print(L236[["pca"]], dims = 1:20,nfeatures = 3)

#PCA scatter；
DimPlot(L236, reduction = "pca")+NoLegend()
#PCA heatmap；
DimHeatmap(L236, dims = 1:20, cells = 2000,balanced = TRUE)




### divide into 2 tissue
CP <- subset(L236,subset=Tissue=='Calvarial Periosteum')
SSM <- subset(L236,subset=Tissue=='Sagittal Suture Mesenchyme')

################ CP
CP <- NormalizeData(CP,normalization.method = "LogNormalize", scale.factor = 10000)
CP <- FindVariableFeatures(CP,selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(CP)
CP <- ScaleData(CP,features = all.genes)

CP <- RunPCA(CP, features =VariableFeatures(object = CP))
#check PCA result
print(CP[["pca"]], dims = 1:20,nfeatures = 3)

#PCA scatter；
DimPlot(CP, reduction = "pca")+NoLegend()
#PCA heatmap；
DimHeatmap(CP, dims = 1:20, cells = 2000,balanced = TRUE)

ElbowPlot(CP)
library(pheatmap)

### check different markers
genes =c("Lepr","Prrx1","Cd200","Thy1","Ctsk","Acta2","Gli1","Mcam") #SSC-like markers
genes =c("Alpl","Mme","Slc44a1","Ror2","Spp1","Bglap","Sp7","Ibsp")
genes =c( 'Pecam1', 'Emcn', 'Cd34')## Endo makrer
genes =c("Wipi1","Mlx","42795","Gdi1","Zdhhc6","Stam","Prag1","Coro1a",  "Arg1","Mgl2","Tmem26","Rnase2a","Mrc1","Egr2","Flt1","Chil3") ### M1 & M2
genes=c('Cxcr4','Itga4','Flt1','Il1b', 'Il6','Tnf','Arg1', 'Il10','Chil3','Mmp9')  ##Pro-angiogenic neutrophils markers


dir.create('CP/cluster')
for (dim  in c(5:20)) {
  for (res in c(0.4,0.5,0.6,0.7,0.8,0.9,1.0)) {
    #find clusters
    CP <- FindNeighbors(CP, dims = 1:dim)
    CP <- FindClusters(CP, resolution = res)
    #  heatmap showing clusters with marker genes
    mycounts <- as.data.frame(AverageExpression(CP,slot = 'scale.data')$RNA,)
    group_list<-c(colnames(mycounts))
    col<-data.frame(Type=group_list)
    col$Type<-factor(col$Type,levels = c(colnames(mycounts)))
    rownames(col)<-colnames(mycounts)
    bks <- seq(-2.1, 2.1, by = 0.1)
    a<-colorRampPalette(c("blue", "white","red"))(length(bks)-1)
    p<-pheatmap(mycounts[ rownames(mycounts)%in% genes,],cluster_row = T,cluster_col=T,show_colnames =T,show_rownames=T,
                annotation_legend=T,annotation_col = col,scale = 'none',color = a,breaks = bks)
    p
    ggsave(paste0('CP/cluster/',dim,'_',res,'.png'),p)
  }
}



### use the code above to repeat SSM clusters and heatmap

# Use the code below for DEGs
getwd()
setwd("/Users/k1773283/OneDrive - King's College London/RNAseq/scRNAseq/Suture")
suture <- readRDS(file = "SSM.rds")

# Check which metadata Yin's CP.rds object already has
suture@meta.data
library(Seurat)
Idents(suture) <- "seurat_clusters"
levels(suture)
DimPlot(suture, reduction = "umap", group.by = "seurat_clusters", label = T)


head(suture@meta.data)
# Add combined metadata for easy filtering between samples
suture$Age_Diet <- paste(suture$Age, suture$Diet, sep = "_")
suture$Age_Diet <- factor(suture$Age_Diet, levels = c("Young_Ad libitum","Adult_Ad libitum","Adult_Intermittent Fasting","Aged_Ad libitum","Aged_Intermittent Fasting"))

suture$Cluster_Age_Diet <- paste(suture$seurat_clusters, suture$Age_Diet, sep = "_")
suture$Cluster_Age_Diet

# Save the object with new metadata
saveRDS(suture, "SSM_JR")

# Work out the cell types in the population
FeaturePlot(suture, features = c("Ptprc","Prrx1"), pt.size = 0.2) + DimPlot(suture, reduction = "umap", group.by = "seurat_clusters", label = T)

# SSC markers
VlnPlot(suture, features = c("Prrx1","Thy1","Ctsk","Lepr","Gli1","Axin2","S100a4"), group.by = "seurat_clusters",pt.size = 0.2)
FeatureScatter(suture, feature1 = "Col1a1", feature2 = "Ptprc", group.by = "seurat_clusters",shuffle = T)

VlnPlot(suture, features = c("Col1a1","Col1a2","Ctsk","Lepr","Gli1","Axin2","S100a4"), group.by = "seurat_clusters",pt.size = 0.2)

# CD45 markers
VlnPlot(suture, features = c("Ptprc"), group.by = "seurat_clusters",pt.size = 0.2)

# Osteo markers c9,2,10
VlnPlot(suture, features = c("Spp1","Mme"), group.by = "seurat_clusters",pt.size = 0.2)

# Adipo markers c12
VlnPlot(suture, features = c("Cd36","Fabp4"), group.by = "seurat_clusters",pt.size = 0.2)

# Find Defining DEGs regardless of condition for each cluster to help with identification
Idents(suture) <- suture$seurat_clusters
c0smarkers <- FindMarkers(suture, ident.1 = "0", ident.2 = NULL, verbose = TRUE)
c1smarkers <- FindMarkers(suture, ident.1 = "1", ident.2 = NULL, verbose = TRUE)
c2smarkers <- FindMarkers(suture, ident.1 = "2", ident.2 = NULL, verbose = TRUE)
c3smarkers <- FindMarkers(suture, ident.1 = "3", ident.2 = NULL, verbose = TRUE)
c4smarkers <- FindMarkers(suture, ident.1 = "4", ident.2 = NULL, verbose = TRUE)
c5smarkers <- FindMarkers(suture, ident.1 = "5", ident.2 = NULL, verbose = TRUE)
c6smarkers <- FindMarkers(suture, ident.1 = "6", ident.2 = NULL, verbose = TRUE)
c7smarkers <- FindMarkers(suture, ident.1 = "7", ident.2 = NULL, verbose = TRUE)
c8smarkers <- FindMarkers(suture, ident.1 = "8", ident.2 = NULL, verbose = TRUE)
c9smarkers <- FindMarkers(suture, ident.1 = "9", ident.2 = NULL, verbose = TRUE)
c10smarkers <- FindMarkers(suture, ident.1 = "10", ident.2 = NULL, verbose = TRUE)
c11smarkers <- FindMarkers(suture, ident.1 = "11", ident.2 = NULL, verbose = TRUE)
c12smarkers <- FindMarkers(suture, ident.1 = "12", ident.2 = NULL, verbose = TRUE)

# Give the clusters a broad 'Type' classification for later grouping (Eg Prrx1/Osteo/Immune)
Idents(suture) <- suture$seurat_clusters
levels(suture)
# suture <- RenameIdents(object = suture, `0` = "Prrx1", `1` = "Prrx1",`2` = "CD45", `3` = "Prrx1",`4` = "Endo",`5` = "CD45",`6` = "CD45",`7` = "CD45",`8` = "CD45",`9` = "Fibro",`10` = "CD45",`11` = "Adipo",`12` = "CD45")
suture$Type_Age_Diet <- paste(suture$Type, suture$Age_Diet, sep = "_")

# DEGs in Suture c10 between conditions
Idents(suture) <- suture$Cluster_Age_Diet
c10s_Aged.IFvsAL <- FindMarkers(suture, ident.1 = "10_Aged_Intermittent Fasting", ident.2 = "10_Aged_Ad libitum", verbose = TRUE)
c10s_Aged.IFvsAL$Genes <- rownames(c10s_Aged.IFvsAL)
library(openxlsx)
write.xlsx(c10s_Aged.IFvsAL, "c10s_Aged.IFvsAL.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)

Idents(suture) <- suture$Cluster_Age_Diet
c10s_YoungALvsAgedAL <- FindMarkers(suture, ident.1 = "10_Young_Ad libitum", ident.2 = "10_Aged_Ad libitum", verbose = TRUE)
library(openxlsx)
write.xlsx(c10s_YoungALvsAgedAL, "c10s_YoungALvsAgedAL.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)

Idents(suture) <- suture$Cluster_Age_Diet
c10s_YoungALvsAdultAL <- FindMarkers(suture, ident.1 = "10_Young_Ad libitum", ident.2 = "10_Adult_Ad libitum", verbose = TRUE)
library(openxlsx)
write.xlsx(c10s_YoungALvsAdultAL, "c10s_YoungALvsAdultAL.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)

Idents(suture) <- suture$Cluster_Age_Diet
c10s_AdultALvsAgedAL <- FindMarkers(suture, ident.1 = "10_Adult_Ad libitum", ident.2 = "10_Aged_Ad libitum", verbose = TRUE)
library(openxlsx)
write.xlsx(c10s_AdultALvsAgedAL, "c10s_AdultALvsAgedAL.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)

Idents(suture) <- suture$Cluster_Age_Diet
c10s_AdultIFvsAdultAL <- FindMarkers(suture, ident.1 = "10_Adult_Intermittent Fasting", ident.2 = "10_Adult_Ad libitum", verbose = TRUE)
library(openxlsx)
write.xlsx(c10s_AdultIFvsAdultAL, "c10s_AdultIFvsAdultAL.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)

Idents(suture) <- suture$Cluster_Age_Diet
c10s_AgedIFvsAgedAL <- FindMarkers(suture, ident.1 = "10_Aged_Intermittent Fasting", ident.2 = "10_Aged_Ad libitum", verbose = TRUE)
library(openxlsx)
write.xlsx(c10s_AgedIFvsAgedAL, "c10s_AgedIFvsAgedAL.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)

# Intersection of conserved upregulated genes throughout aging SUTURE SENESCENCE
Suturec10YoungvsAdultdegs <- read_excel(file.choose())
Suturec10AdultvsAgeddegs <- read_excel(file.choose())
Suturec10YoungvsAgeddegs <- read_excel(file.choose())

Suturec10YoungvsAdultdegsDOWN <- Suturec10YoungvsAdultdegs$...1[(Suturec10YoungvsAdultdegs$avg_log2FC<0)]
Suturec10AdultvsAgeddegsDOWN <- Suturec10AdultvsAgeddegs$...1[(Suturec10AdultvsAgeddegs$avg_log2FC<0)]
Suturec10YoungvsAgeddegsDOWN <- Suturec10YoungvsAgeddegs$...1[(Suturec10YoungvsAgeddegs$avg_log2FC<0)]

e <- intersect(Suturec10YoungvsAdultdegsDOWN, Suturec10AdultvsAgeddegsDOWN)
SUTconservedagingdegs <- intersect(e, Suturec10YoungvsAgeddegsDOWN)
SUTconservedagingdegs
f <- intersect(SUTconservedagingdegs,rownames(c10s_AgedIFvsAgedAL[(c10s_AgedIFvsAgedAL$avg_log2FC<0) & (c10s_AgedIFvsAgedAL$p_val < 0.05),]))
g <- intersect(SUTconservedagingdegs,rownames(c10s_AdultIFvsAdultAL[(c10s_AdultIFvsAdultAL$avg_log2FC<0) & (c10s_AdultIFvsAdultAL$p_val < 0.05),]))
h <- intersect(f,g)

rownames(c10s_AgedIFvsAgedAL[(c10s_AgedIFvsAgedAL$avg_log2FC<0) & (c10s_AgedIFvsAgedAL$p_val < 0.05),])

citation()

# Intersection of conserved upregulated genes throughout aging PERIOSTEUM SENESCENCE
PeriosteumPrrx1YoungvsAdultdegs <- read_excel(file.choose())
PeriosteumPrrx1AdultvsAgeddegs <- read_excel(file.choose())
PeriosteumPrrx1YoungvsAgeddegs <- read_excel(file.choose())

PeriosteumPrrx1YoungvsAdultdegsDOWN <- as.character(PeriosteumPrrx1YoungvsAdultdegs[(PeriosteumPrrx1YoungvsAdultdegs$avg_log2FC<0),1])
PeriosteumPrrx1AdultvsAgeddegsDOWN <- as.character(PeriosteumPrrx1AdultvsAgeddegs[(PeriosteumPrrx1AdultvsAgeddegs$avg_log2FC<0),1])
PeriosteumPrrx1YoungvsAgeddegsDOWN <- as.character(PeriosteumPrrx1YoungvsAgeddegs[(PeriosteumPrrx1YoungvsAgeddegs$avg_log2FC<0),1])


class(PeriosteumPrrx1AdultvsAgeddegsDOWN)
class(PeriosteumPrrx1YoungvsAdultdegsDOWN)
class(PeriosteumPrrx1YoungvsAgeddegsDOWN)

e <- intersect(PeriosteumPrrx1YoungvsAdultdegsDOWN, PeriosteumPrrx1AdultvsAgeddegsDOWN)
PERIconservedagingdegs <- intersect(e, PeriosteumPrrx1YoungvsAgeddegsDOWN)
PERIconservedagingdegs

# =======================================================
# Conserved Aging VENN DIAGRAM FIGURES
# https://venn.bio-spring.top/using-ggvenndiagram#changing-palette
# =======================================================

if (!require(devtools)) install.packages("devtools")
devtools::install_github("gaospecial/ggVennDiagram")
library("ggVennDiagram")
suturevenngenes <- list(AdultvsYoung=Suturec10YoungvsAdultdegsDOWN,
          AgedvsAdult=Suturec10AdultvsAgeddegsDOWN,
          AgedvsYoung=Suturec10YoungvsAgeddegsDOWN)
ggVennDiagram(suturevenngenes, label_alpha = 0.5) + scale_fill_distiller(palette = "Reds", direction = 1) +
  scale_color_brewer(palette = "Accent")

periosteumvenngenes <- list(AdultvsYoung=PeriosteumPrrx1YoungvsAdultdegsDOWN,
                        AgedvsAdult=PeriosteumPrrx1AdultvsAgeddegsDOWN,
                        AgedvsYoung=PeriosteumPrrx1YoungvsAgeddegsDOWN)
ggVennDiagram(periosteumvenngenes, label_alpha = 0.5) + scale_fill_distiller(palette = "Reds", direction = 1) +
  scale_color_brewer(palette = "Accent")


# Comparison with Diet-modulated DEGs
Suturec10AdIFvsAdALdegs <- read_excel(file.choose())
Suturec10YAgIFvsAgALdegs <- read_excel(file.choose())

Suturec10AdIFvsAdALdegsDOWN <- Suturec10AdIFvsAdALdegs$...1[(Suturec10AdIFvsAdALdegs$avg_log2FC<0) & (degs$p_val<0.05)]
Suturec10YAgIFvsAgALdegsDOWN <- Suturec10YAgIFvsAgALdegs$...1[(Suturec10YAgIFvsAgALdegs$avg_log2FC<0) & (degs$p_val<0.05)]

SUTconserveddietdegs <- intersect(Suturec10AdIFvsAdALdegsDOWN, Suturec10YAgIFvsAgALdegsDOWN)
SUTconservedaging_dietdegs <- intersect(SUTconservedagingdegs, SUTconserveddietdegs)



Idents(suture) <- "seurat_clusters"
DimPlot(subset(suture,subset = seurat_clusters == "10")) +
  FeaturePlot(subset(suture,subset = seurat_clusters == "10"), features = c("Ctsk","Gli1","Axin2","Prrx1"))

# ================================================================================
# Look for heterogeneity within c10 by reclustering
suturec10 <- subset(suture,subset = seurat_clusters == "10")
suturec10 <- RunPCA(suturec10, features =VariableFeatures(object = suturec10))
print(suturec10[["pca"]], dims = 1:20,nfeatures = 3)
DimPlot(suturec10, reduction = "pca")+NoLegend()
ElbowPlot(suturec10)
suturec10 <- FindNeighbors(suturec10, dims = 1:5)
suturec10 <- FindClusters(suturec10, resolution = 0.3)
DimPlot(suturec10)
Idents(suturec10) <- "seurat_clusters"
suturec10.0markers <- FindMarkers(suturec10, ident.1 = "0", ident.2 = NULL, verbose = TRUE)
suturec10.1markers <- FindMarkers(suturec10, ident.1 = "1", ident.2 = NULL, verbose = TRUE)
suturec10.2markers <- FindMarkers(suturec10, ident.1 = "2", ident.2 = NULL, verbose = TRUE)
suturec10.3markers <- FindMarkers(suturec10, ident.1 = "3", ident.2 = NULL, verbose = TRUE)

suturec10.0markers$Genes <- rownames(suturec10.0markers)
suturec10.1markers$Genes <- rownames(suturec10.1markers)
suturec10.2markers$Genes <- rownames(suturec10.2markers)
suturec10.3markers$Genes <- rownames(suturec10.3markers)


write.xlsx(suturec10.0markers, "suturec10.0markers.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)


FeaturePlot(suture, features = "Ptprc")
FeaturePlot(suture, features = "Ptprc")
FeaturePlot(suture, features = "Ptprc")
Idents(suture) <- "seurat_clusters"
VlnPlot(suture, features = c("Tek","Cdh5","Emcn","Pecam1","Cd34","Kdr","Icam1","Anxa2"), pt.size = 0.6)
VlnPlot(suture, features = c("Runx2"), pt.size = 0.6)

y <- FindMarkers(suture, ident.1 = "4", ident.2 = c("0","1","2","3","5","6","7","8","9","11"), verbose = TRUE)
y$genes <- rownames(y)
library(dplyr)
library(data.table)
md <- suturec10@meta.data %>% as.data.table
# the resulting md object has one "row" per cell
## count the number of cells per unique combinations of "Sample" and "seurat_clusters"
cell_table <- md[, .N, by = c("Age_Diet", "seurat_clusters")]
library(openxlsx)
write.xlsx(cell_table, "suturec10 subclustering.xlsx")


FeaturePlot(suturec10, features = c("Dbp","Hlf","Tef"), split.by = "Age_Diet")

Idents(suture) <- suture$Age_Diet
Aged.IFvsAL <- FindMarkers(suture, ident.1 = "Aged_Intermittent Fasting", ident.2 = "Aged_Ad libitum", verbose = TRUE)


library(openxlsx)
write.xlsx(Prrx1_Aged.IFvsAL, "Prrx1_Aged.IFvsAL.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)
Prrx1_Aged.IFvsAL$Genes <- rownames(Prrx1_Aged.IFvsAL)

# DEGs in Prrx1_Adult between diets
Idents(suturec10) <- suturec10$Age_Diet
suturec10.AdultIFvsAdultAL <- FindMarkers(suturec10, ident.1 = "Adult_Intermittent Fasting", ident.2 = "Adult_Ad libitum", verbose = TRUE)
suturec10.AdultIFvsAdultAL$Genes <- rownames(suturec10.AdultIFvsAdultAL)
write.xlsx(suturec10.AdultIFvsAdultAL, "suturec10.AdultIFvsAdultAL.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)

# DEGs in Prrx1 between Adult AL and Aged AL
Idents(periosteum) <- periosteum$Cluster_Age_Diet
w <- FindMarkers(periosteum, ident.1 = "3_Adult_Ad libitum", ident.2 = "3_Aged_Ad libitum", verbose = TRUE)
w$Genes <- rownames(w)
write.xlsx(Prrx1.AdultvsAged, "Prrx1.AdultvsAged.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)

# DEGs in Prrx1 between Young AL and Aged AL
Idents(periosteum) <- periosteum$Type_Age_Diet
Prrx1.YoungvsAged <- FindMarkers(periosteum, ident.1 = "Prrx1_Young_Ad libitum", ident.2 = "Prrx1_Aged_Ad libitum", verbose = TRUE)
Prrx1.YoungvsAged$Genes <- rownames(Prrx1.YoungvsAged)
write.xlsx(Prrx1.YoungvsAged, "Prrx1.YoungvsAged.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)

# DEGs in Prrx1 between Young AL and Adult AL
Idents(periosteum) <- periosteum$Type_Age_Diet
Prrx1.YoungvsAdult <- FindMarkers(periosteum, ident.1 = "Prrx1_Young_Ad libitum", ident.2 = "Prrx1_Adult_Ad libitum", verbose = TRUE)
Prrx1.YoungvsAdult$Genes <- rownames(Prrx1.YoungvsAdult)
write.xlsx(Prrx1.YoungvsAdult, "Prrx1.YoungvsAdult.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)

# DEGs in Prrx1 between Young AL and Aged IF
Idents(periosteum) <- periosteum$Type_Age_Diet
Prrx1.YoungvsAgedIF <- FindMarkers(periosteum, ident.1 = "Prrx1_Young_Ad libitum", ident.2 = "Prrx1_Aged_Intermittent Fasting", verbose = TRUE)
Prrx1.YoungvsAgedIF$Genes <- rownames(Prrx1.YoungvsAgedIF)
write.xlsx(Prrx1.YoungvsAgedIF, "Prrx1.YoungvsAgedIF.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)

# DEGs in 0 and 1, all conditions combined
Idents(periosteum) <- periosteum$seurat_clusters
Cluster0vs1.allconditions <- FindMarkers(periosteum, ident.1 = "0", ident.2 = "1", verbose = TRUE)
Cluster0vs1.allconditions$Genes <- rownames(Cluster0vs1.allconditions)
write.xlsx(Cluster0vs1.allconditions, "Cluster0vs1.allconditions.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)

# What DEGs define c11, c13
Idents(periosteum) <- periosteum$seurat_clusters
# Adipo - expresses CD36 and Fabp4 relative to Prrx1+
c11markers <- FindMarkers(periosteum, ident.1 = 11, ident.2 = c("0","1","3"), verbose = TRUE)

# Bcell markers Ly6d/Cd79a+b and Ptprc/Prrx1 overlap?
c13markers <- FindMarkers(periosteum, ident.1 = 13, ident.2 = NULL, verbose = TRUE)
c13markers <- FindMarkers(periosteum, ident.1 = 13, ident.2 = c("0","1","3"), verbose = TRUE)

FeaturePlot(periosteum, features = c("Dbp","Tef","Hlf"), split.by = "Age_Diet", pt.size = 0.2)
FeaturePlot(periosteum, features = c("Prrx1","Ptprc","Nfil3"), pt.size = 0.2)
FeaturePlot(periosteum, features = c("Smpd3","Adra1b"), split.by = "Age_Diet", pt.size = 0.2)
FeaturePlot(periosteum, features = c("Nanog","Sox2","Klf4","Oct4","Acta2"), pt.size = 0.2)

# SSC markers
VlnPlot(suture, features = c("Prrx1","Thy1","Ctsk","Lepr","Gli1"), group.by = "seurat_clusters",pt.size = 0.2)

# Osteo markers
VlnPlot(suture, features = c("Spp1","Mme","Alpl",""), group.by = "seurat_clusters",pt.size = 0.2)

# Adipo markers c4/11
VlnPlot(periosteum, features = c("Cd36","Fabp4"), group.by = "seurat_clusters",pt.size = 0.2)

VlnPlot(periosteum, features = genes, group.by = "seurat_clusters",pt.size = 0.2)
Idents(periosteum) <- "Type"
DimPlot(subset(x = periosteum, subset = Type == "Prrx1"), group.by = "seurat_clusters", split.by = "Age_Diet", label = T)

periosteum$seurat_clusters
# Save the object with new metadata
saveRDS(periosteum, "CP_JR")

# Can't run UCell until R updates to 4.2.1 ================================================

# if (!require("BiocManager", quietly = TRUE))
#   install.packages("BiocManager")
# BiocManager::install(version = "3.15")
# sessionInfo()
# 
# BiocManager::install("BiocParallel")
# remotes::install_github("carmonalab/UCell", ref="v1.3")
# library(Seurat)
# library(UCell)

# ==========================================
# Reading Dbp binding genes D-box from https://www.nature.com/articles/s42003-019-0522-3#MOESM4

library(readxl)
my_data <- read_excel(file.choose())

# Pull the column containing gene names into a list
dbp_sites_list <- my_data$`gene_name(distance from TSS)`

# Separate them out if multiple genes separated by column
dbp_sites_list <- unlist(strsplit(dbp_sites_list,","))

# Remove NAs
dbp_sites_list <- dbp_sites_list[!is.na(dbp_sites_list)]

# Remove brackets and anything inside them
dbp_sites_list <- gsub(r"{\s*\([^\)]+\)}","",dbp_sites_list)

# Remove duplicates
dbp_sites_list <- unique(dbp_sites_list)

# Write them to an excel file for copying to GOterm websites
write.xlsx(as.data.frame(dbp_sites_list),"dbp_sites_list.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)

# Which positive significant DEGs are under D-box promoters?
intersect(rownames(Prrx1_Aged.IFvsAL[Prrx1_Aged.IFvsAL$p_val<0.05 & Prrx1_Aged.IFvsAL$avg_log2FC>0,]), dbp_sites_list)
dbp_sites_list[2644:2649]


# Process the literature-collated PLZF hMSC target genes into mouse format

library(readxl)
plzf_genes_raw <- read_excel(file.choose())
plzf_genes <- plzf_genes_raw$genes
plzf_genes <- tolower(plzf_genes[!is.na(plzf_genes)])
plzf_genes2 <- plzf_genes_raw$genes2
plzf_genes2 <- tolower(plzf_genes2[!is.na(plzf_genes2)])

plzf_genes_list <- c(plzf_genes, plzf_genes2)
plzf_genes_list <- unique(plzf_genes_list)

library(stringr)
plzf_genes_list <- str_to_title(plzf_genes_list)
plzf_genes_list <- unlist(strsplit(plzf_genes_list,"/"))
library(openxlsx)
write.xlsx(as.data.frame(plzf_genes_list), "plzf_genes_list.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)

VlnPlot(periosteum, features = "nFeature_RNA",group.by = "Age_Diet")
saveRDS(periosteum,'CP_JR.rds')

periosteum$correlation_nFeature_RNA.
periosteum$d

VlnPlot(periosteum, features = c("Gli1"), group.by = "Age_Diet", pt.size = 0.6)

# Volcanoplot to display top DEGs
# https://bioconductor.org/packages/devel/bioc/vignettes/EnhancedVolcano/inst/doc/EnhancedVolcano.html

install.packages("devtools")
devtools::install_github('kevinblighe/EnhancedVolcano')
library(EnhancedVolcano)
 
periosteum$Type_Age_Diet
DimPlot(periosteum, group.by = "Type")
Idents(suturec10) <- "Age_Diet"

vol1 <- FindMarkers(suturec10, ident.1 = "Aged_Intermittent Fasting", ident.2 = "Aged_Ad libitum", verbose = TRUE)
EnhancedVolcano(vol1,
                lab = rownames(vol1),
                x = 'avg_log2FC',
                y = 'p_val',
                selectLab = c("Ccl11","Duox2","Dbp","Tef","Hlf","Tnfaip6","Ccl7","Ptx3","Zbtb16","Mmp14","Vcan"),
                pCutoff = 5e-2,
                FCcutoff = 0.0,
                pointSize = 2.0,
                labSize = 4.0,
                drawConnectors = TRUE,
                widthConnectors = 0.75)+
  # ggplot2::coord_cartesian(xlim=c(-1.5, 1.5)) +
  # ggplot2::scale_x_continuous(breaks=seq(-1.5,1.5, 0.5))+
  coord_flip()
  # ggplot2::coord_cartesian(ylim=c(0, 6.25)) +
  # ggplot2::scale_y_continuous(breaks=seq(0,6.25, 0.5))
                

# DEGs Prrx1 AgedIF vs AgedAL
Idents(periosteum) <- "seurat_clusters"
sub.periosteum <- subset(periosteum, idents = c("0","1","3"))
Idents(sub.periosteum) <- "Age_Diet"
vol2 <- FindMarkers(sub.periosteum, ident.1 = "Aged_Intermittent Fasting", ident.2 = "Aged_Ad libitum", verbose = TRUE)
EnhancedVolcano(vol2,
                lab = rownames(vol2),
                x = 'avg_log2FC', y = 'p_val_adj',
                selectLab = c("Ccl11","Duox2","Dbp","Tef","Hlf","Tnfaip6","Ccl7","Ptx3","Zbtb16","Mmp14","Vcan"),
                pCutoff = 0.05,
                FCcutoff = 0.25,
                pointSize = 4.0,
                labSize = 6.0,
                drawConnectors = TRUE,
                widthConnectors = 0.75)+
  ggplot2::coord_cartesian(xlim=c(-1.5, 1.5)) +
  ggplot2::scale_x_continuous(breaks=seq(-1.5,1.5, 0.5))+
  ggplot2::coord_cartesian(ylim=c(0, 6.25)) +
  ggplot2::scale_y_continuous(breaks=seq(0,6.25, 0.5))+
  coord_flip()

EnhancedVolcano(Suturec10YAgIFvsAgALdegs,
                lab = rownames(Suturec10YAgIFvsAgALdegs),
                x = 'avg_log2FC', y = 'p_val',
                selectLab = c("Sirt1","Kat2b","Csnk1e","Ezh2","Fbxw11","Ucp","Dbp","Mki67"),
                pCutoff = 0.05,
                FCcutoff = 0.25,
                pointSize = 4.0,
                labSize = 6.0,
                drawConnectors = TRUE,
                widthConnectors = 0.75)+
  # ggplot2::coord_cartesian(xlim=c(-1.5, 1.5)) +
  # ggplot2::scale_x_continuous(breaks=seq(-1.5,1.5, 0.5))+
  # ggplot2::coord_cartesian(ylim=c(0, 6.25)) +
  # ggplot2::scale_y_continuous(breaks=seq(0,6.25, 0.5))+
  coord_flip()

EnhancedVolcano(volbvt,
                lab = rownames(volbvt),
                x = 'avg_log2FC', y = 'p_val_adj',
                selectLab = c("Col3a1","Col1a2","Pcolce","Sparc","Cdon","Gas1","Lama2","Malat1","Igf1","Itm2a","Htra1","Aebp1","Serpinf1","Duox2","Ptx3","Ccl7","Wnt2"),
                pCutoff = 0.05,
                FCcutoff = 0.25,
                pointSize = 4.0,
                labSize = 6.0,
                drawConnectors = TRUE,
                widthConnectors = 0.75)+
  ggplot2::coord_cartesian(xlim=c(-2, 2)) +
  ggplot2::scale_x_continuous(breaks=seq(-2,2, 1))+
  # ggplot2::coord_cartesian(ylim=c(0, 6.25)) +
  # ggplot2::scale_y_continuous(breaks=seq(0,6.25, 0.5))+
  coord_flip()

library(dplyr)
volsutAdALvsAgAL <- Suturec10AdultvsAgeddegs %>%
  mutate(p_val_adj = p_val)
EnhancedVolcano(Suturec10AdultvsAgeddegs,
                lab = rownames(Suturec10AdultvsAgeddegs),
                x = 'avg_log2FC', y = 'p_val_adj',
                selectLab = c("Wnt2"),
                pCutoff = 1,
                FCcutoff = 0.25,
                pointSize = 4.0,
                labSize = 6.0,
                drawConnectors = TRUE,
                widthConnectors = 0.75)+
  # ggplot2::coord_cartesian(xlim=c(-2, 2)) +
  # ggplot2::scale_x_continuous(breaks=seq(-2,2, 1))+
  # ggplot2::coord_cartesian(ylim=c(0, 6.25)) +
  # ggplot2::scale_y_continuous(breaks=seq(0,6.25, 0.5))+
  coord_flip()


library(ggrepel)
library("readxl")
getwd()
setwd("/Users/k1773283/OneDrive - King's College London/RNAseq/scRNAseq/Periosteum")

volcano1 <- read_excel("Prrx1.YoungvsAged.xlsx")
label_list <- c("Col3a1","Sparc","Col1a1","Col1a2","Col5a2","Rpl13","Rpl8","Rps6","Rpl9","Rpl5","Lrp6") #Young vs Aged PERI

# volcano1 <- read_excel("c10s_YoungALvsAgedAL.xlsx")
# label_list <- c("Col3a1","Sparc","Col1a1","Col1a2","Col5a1","Rps18","Rpl10","Rps5","Rpl12","Wnt2") #Young vs Aged SUT

# volcano1 <- read_excel("Prrx1.AdultvsAged.xlsx")
# label_list <- c("Ndufa4","Ndufa7","Ndufb10","Ndufa2","Ndufs7","Wnt2","Rps20","Rps2","Rpl9","Rps24","Rps19") #Adult vs Aged PERI

# volcano1 <- read_excel("c10s_AdultALvsAgedAL.xlsx")
# label_list <- c( "Ndufa12","Ndufs7","Ndufv3","Ndufa8","Wnt2","Rpl9","Rps2","Rpl11") #Adult vs Aged SUT

volcano1 <- read_excel("PeriosteumBottomvsTop.xlsx") #Bottom vs Top
label_list <- c("Col3a1","Col1a2","Pcolce","Sparc","Cdon","Gas1","Lama2","Malat1","Igf1","Itm2a","Htra1","Aebp1","Serpinf1","Duox2","Ptx3","Ccl7","Wnt2")

volcano1 <- read_excel("Prrx1_Aged.IFvsAL.xlsx") #PERI IF Aged
label_list <- c("Ccl11","Duox2","Dbp","Tef","Hlf","Tnfaip6","Ccl7","Ptx3","Zbtb16","Mmp14","Vcan")

volcano1 <- read_excel("c10s_AgedIFvsAgedAL.xlsx") #SUT IF Aged
label_list <- c("Sirt1","Kat2b","Csnk1e","Ezh2","Fbxw11","Ucp","Dbp","Mki67")

volcano1$diffexpressed <- "ns"
volcano1$diffexpressed[volcano1$avg_log2FC > 0 & volcano1$p_val < 0.05] <- "UP p < 0.05"
volcano1$diffexpressed[volcano1$avg_log2FC < 0 & volcano1$p_val < 0.05] <- "DN p < 0.05"
volcano1$diffexpressed[volcano1$avg_log2FC > 0 & volcano1$p_val_adj < 0.05] <- "UP FDR < 0.05"
volcano1$diffexpressed[volcano1$avg_log2FC < 0 & volcano1$p_val_adj < 0.05] <- "DN FDR < 0.05"
volcano1$label <- NA
volcano1$label[volcano1$...1 %in% label_list] <- volcano1$...1[volcano1$...1 %in% label_list]
# mycolors <- c("cyan4","darkslategray2","azure3", "brown2", "darksalmon")
mycolors <- c("darkslategray2","azure3","darksalmon")
ggplot(data=volcano1, aes(x=avg_log2FC, y=-log10(p_val),col=diffexpressed, label=label)) + geom_point(alpha = 1) + theme_minimal() + geom_text_repel(nudge_x = 0.3, colour = "black")+ scale_colour_manual(values = mycolors)+  coord_flip() 
# Export pdf at 4 x 8
